For this assignment, we are going to work with the Census data, with US health data from: http://ghdx.healthdata.org/us-data . For the census data, we are going to use the American Community Survey (ACS). The 2017 ACS variables are described here: https://api.census.gov/data/2017/acs/acs5/variables.html WE are also going to use the GDP data from the BEA. Also, Infectious Disease Mortality Rates by County data set from: http://ghdx.healthdata.org/record/ihme-data/united-states-infectious-disease-mortality-rates-county-1980-2014
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from census import Census
from us import states
import plotly.figure_factory as ff
import plotly.graph_objects as go
After downloading geopandas and plotly turning off those command as comments.
##conda install geopandas pyshp shapely
##conda install -c plotly plotly plotly-geo
%matplotlib inline
c = Census('')
Using for loop to load all the files.
files = ['IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_ALABAMA_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_ALASKA_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_ARIZONA_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_ARKANSAS_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_CALIFORNIA_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_COLORADO_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_CONNECTICUT_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_DELAWARE_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_DISTRICT_OF_COLUMBIA_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_FLORIDA_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_GEORGIA_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_HAWAII_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_IDAHO_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_ILLINOIS_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_INDIANA_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_IOWA_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_KANSAS_Y2018M03D27.csv','IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_KENTUCKY_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_LOUISIANA_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_MAINE_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_MARYLAND_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_MASSACHUSETTS_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_MICHIGAN_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_MINNESOTA_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_MISSISSIPPI_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_MISSOURI_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_MONTANA_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_NEBRASKA_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_NEVADA_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_NEW_HAMPSHIRE_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_NEW_JERSEY_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_NEW_MEXICO_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_NEW_YORK_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_NORTH_CAROLINA_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_NORTH_DAKOTA_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_OHIO_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_OKLAHOMA_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_OREGON_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_PENNSYLVANIA_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_RHODE_ISLAND_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_SOUTH_CAROLINA_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_SOUTH_DAKOTA_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_TENNESSEE_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_TEXAS_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_UTAH_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_VERMONT_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_VIRGINIA_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_WASHINGTON_Y2018M03D27.csv',
'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_WEST_VIRGINIA_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_WISCONSIN_Y2018M03D27.csv', 'IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_WYOMING_Y2018M03D27.csv']
df = pd.concat([pd.read_csv(f) for f in files])
df.head()
The state level FIPS code is from 1 to 56 and county level FIPS code is greater than 56. So, using that logic to split the state and county level data.
d_state = df.loc[df['FIPS']<=56]
We are just considering mortality rate data for state level only for 'year 2014' and for 'both' sex. So, going to split that data.
d_state_both = d_state[d_state.sex_id == 3]
d_state_2014 = d_state_both.loc[d_state_both['year_id']==2014]
d_state_2014.head(10)
g = sns.FacetGrid(d_state_2014, col="cause_name", col_wrap = 3, sharex=False, sharey=False)
g = g.map(plt.hist, "mx")
Pivoting this dataframe for some useful later use.
d_stateName_14 = d_state_2014.pivot(index = 'location_name',columns ='cause_name',values='mx')
d_state_14 = d_state_2014.pivot(index = 'FIPS',columns ='cause_name',values='mx')
d_state_14.head(6)
Numerical description (mortality rate for diseases on year 2014 by state):
d_state_14.describe()
d_county = df.loc[df['FIPS']>56]
We are just considering mortality rate data for county level only for 'year 2014' and for 'both' sex. So, going to split that data.
d_county_2014 = d_county.loc[d_county['year_id']==2014]
d_county_2014 = d_county_2014[d_county_2014.sex_id == 3]
d_county_2014.tail(6)
gc = sns.FacetGrid(d_county_2014, col="cause_name", col_wrap = 3, sharex=False, sharey = False)
gc = gc.map(plt.hist, "mx")
Pivoting this dataframe:
d_county_14 = d_county_2014.pivot(index = 'FIPS',columns ='cause_name',values='mx')
d_county_14.head(6)
Numerical description (mortality rate for diseases on year 2014 by state):
d_county_14.describe()
Loading file and formatting gdp data:
gdp = pd.read_excel('GCP_Release_1.xlsx', header=3)
gdp.head()
Just taking data of 'All Industries':
gdp.rename(columns={
'Unnamed: 0': 'FIPS',
'Unnamed: 1': 'County',
'Unnamed: 2': 'State',
'Unnamed: 3': 'LineCode'
}, inplace=True)
gdp = gdp[gdp['LineCode'] == 1.0]
gdp.info()
gdp['FIPS'] = gdp['FIPS'].astype('i8')
gdp.head()
Formating the FIPS code to get the state FIPS code and doing 'groupby' by state FIPS code to get state level GDP.
state_gdp = gdp.assign(StateFIPS = gdp['FIPS'] // 1000).groupby('StateFIPS')[[2014]].sum()
state_gdp.head()
Numerical description of 2014 State GDP:
state_gdp.describe()
Histogram of 2014 State GDP:
state_gdp.hist()
plt.axvline(state_gdp[2014].mean(), color='yellow', linestyle = '-', label='Mean')
plt.axvline(state_gdp[2014].median(), color='red', linestyle = ':', label='Median')
plt.legend()
county_gdp = gdp[['FIPS', 'County','State', 2014]]
county_gdp.head()
Numerical description of 2014 county GDP:
#county_gdp.describe()
county_gdp[2014].mean()
Histogram of 2014 county GDP:
county_gdp[2014].hist()
plt.axvline(county_gdp[2014].mean(), color='yellow', linestyle = '-', label='Mean')
plt.axvline(county_gdp[2014].median(), color='red', linestyle = ':', label='Median')
plt.xscale('log')
plt.yscale('log')
plt.legend()
B00001_001E:an estimate of the total population of a region
C27017_001E:total
C27017_002E:population under the poverty line
B18135_020E : Health Insurance coverage for age 19-64 with no disability
Loading ACS data for state level:
st_pop = c.acs.state(('NAME', 'B00001_001E', 'C27017_001E', 'C27017_002E','B18135_020E'), '*', year=2014)
st_pop = pd.DataFrame.from_records(st_pop)
st_pop.head()
st_pop.info()
st_pop.rename(columns={
'B00001_001E': 'Est.Total',
'C27017_001E': 'Total_pop',
'C27017_002E': 'Under_poverty',
'B18135_020E': 'Health_Insurance',
'state': 'FIPS'
}, inplace=True)
st_pop['FIPS'] = st_pop['FIPS'].astype('i8')
st_pop.head()
Getting the Fraction of the population below the poverty line and Fraction of the population with health insurance coverage.
st_pop['Fraction'] = st_pop['Under_poverty']/st_pop['Total_pop']
st_pop['Health_ins_frac'] = st_pop['Health_Insurance']/st_pop['Total_pop']
st_pop.head()
st_pop['Total_pop'].describe()
st_pop['Total_pop'].hist()
plt.axvline(st_pop['Total_pop'].mean(), color='yellow', linestyle = '-', label='Mean')
plt.axvline(st_pop['Total_pop'].median(), color='red', linestyle = ':', label='Median')
plt.legend()
st_pop['Health_ins_frac'].describe()
st_pop['Health_ins_frac'].hist()
plt.axvline(st_pop['Health_ins_frac'].mean(), color='yellow', linestyle = '-', label='Mean')
plt.axvline(st_pop['Health_ins_frac'].median(), color='red', linestyle = ':', label='Median')
plt.legend()
Loading ACS data for county level:
cnt_pop = c.acs.state_county(('NAME', 'C27017_001E', 'C27017_002E','B18135_020E'), '*','*', year=2014)
cnt_pop = pd.DataFrame.from_records(cnt_pop)
cnt_pop.sort_values('state').head()
cnt_pop.info()
cnt_pop.rename(columns={
'B00001_001E': 'Est.Total',
'C27017_001E': 'Total_pop',
'C27017_002E': 'Under_poverty',
'B18135_020E': 'Health_Insurance',
'county': 'FIPS',
'state': 'st_FIPS'
}, inplace=True)
cnt_pop['FIPS'] = cnt_pop['FIPS'].astype('i4')
cnt_pop['st_FIPS'] = cnt_pop['st_FIPS'].astype('i4')
cnt_pop.head()
Getting the Fraction of the population below the poverty line and Fraction of the population with health insurance coverage.
cnt_pop['Fraction'] = cnt_pop['Under_poverty']/cnt_pop['Total_pop']
cnt_pop['Health_Ins_Frac'] = cnt_pop['Health_Insurance']/cnt_pop['Total_pop']
cnt_pop.head()
cnt_pop['Total_pop'].describe()
cnt_pop['Total_pop'].hist()
plt.axvline(cnt_pop['Total_pop'].mean(), color='yellow', linestyle = '-', label='Mean')
plt.axvline(cnt_pop['Total_pop'].median(), color='red', linestyle = ':', label='Median')
plt.xscale('log')
plt.yscale('log')
plt.legend()
cnt_pop['Health_Ins_Frac'].describe()
cnt_pop['Health_Ins_Frac'].hist()
plt.axvline(cnt_pop['Health_Ins_Frac'].mean(), color='yellow', linestyle = '-', label='Mean')
plt.axvline(cnt_pop['Health_Ins_Frac'].median(), color='red', linestyle = ':', label='Median')
plt.legend()
In this part, we are going to display the state- and county-level Tuberculosis and Lower Respiratory mortality rates on a maps, using Plotly’s Choropleth feature.
Taking the dataframe and assigning there the State postal code.
d_stateName_14.head(6)
d_stateName_14 = d_stateName_14.assign(code = ['AL', 'AK', 'AZ','AR','CA','CO','CT','DE','DC',
'FL','GA','HI','ID','IL','IN',
'IA','KS','KY','LA','ME','MD','MA','MI','MN','MS','MO','MT',
'NE','NV','NH','NJ','NM','NY','NC','ND','OH',
'OK','OR','PA','RI','SC','SD','TN',
'TX','UT','VT','VA','WA','WV','WI','WY'])
new_stName = d_stateName_14.reset_index()
sp2_st_14 = new_stName[['location_name','code','Tuberculosis','Lower respiratory infections']]
sp2_st_14.head()
fig = go.Figure(data=go.Choropleth(
locations=sp2_st_14['code'], # Spatial coordinates
z = sp2_st_14['Tuberculosis'].astype(float), # Data to be color-coded
locationmode = 'USA-states', # set of locations match entries in `locations`
colorscale = 'Reds',
colorbar_title = "Tuberculosis mortality rates",
))
fig.update_layout(
title_text = 'Tuberculosis mortality rates by states',
geo_scope='usa', # limite map scope to USA
)
fig.show()
fig = go.Figure(data=go.Choropleth(
locations=sp2_st_14['code'], # Spatial coordinates
z = sp2_st_14['Lower respiratory infections'].astype(float), # Data to be color-coded
locationmode = 'USA-states', # set of locations match entries in `locations`
colorscale = 'Greens',
colorbar_title = "Lower Respiratory Infections mortality rates",
))
fig.update_layout(
title_text = 'Lower Respiratory Infections mortality rates by states',
geo_scope='usa', # limite map scope to USA
)
fig.show()
d_county_14.head(6)
new_cnt = d_county_14.reset_index()
sp_cnt_14 = new_cnt[['FIPS','Tuberculosis','Lower respiratory infections']]
sp_cnt_14.head()
## new one
sp_cnt_14['FIPS'] = sp_cnt_14['FIPS'].apply(lambda x: str(x).zfill(3))
values = sp_cnt_14['Tuberculosis'].tolist()
##sp_cnt_14['FIPS']=sp_cnt_14['FIPS'].apply(lambda x: str(x).zfill(3))
fips=sp_cnt_14['FIPS'].tolist()
colorscale = [
'rgb(68.0, 1.0, 84.0)',
'rgb(66.0, 64.0, 134.0)',
'rgb(38.0, 130.0, 142.0)',
'rgb(63.0, 188.0, 115.0)',
'rgb(216.0, 226.0, 25.0)'
]
fig = ff.create_choropleth(
fips=fips, values=values,
county_outline={'color': 'rgb(255,255,255)', 'width': 0.5},
legend_title='Tuberculosis mortality rate per county'
)
fig.update_layout(
legend_x = 0,
annotations = {'x': -0.12, 'xanchor': 'left'}
)
fig.layout.template = None
fig.show()
sp_cnt_14['FIPS'] = sp_cnt_14['FIPS'].apply(lambda x: str(x).zfill(3))
values = sp_cnt_14['Lower respiratory infections'].tolist()
##sp_cnt_14['FIPS']=sp_cnt_14['FIPS'].apply(lambda x: str(x).zfill(3))
fips=sp_cnt_14['FIPS'].tolist()
colorscale = [
'rgb(68.0, 1.0, 84.0)',
'rgb(66.0, 64.0, 134.0)',
'rgb(38.0, 130.0, 142.0)',
'rgb(63.0, 188.0, 115.0)',
'rgb(216.0, 226.0, 25.0)'
]
fig = ff.create_choropleth(
fips=fips, values=values,
county_outline={'color': 'rgb(255,255,255)', 'width': 0.5},
legend_title='Lower respiratory infections mortality rate per county'
)
fig.update_layout(
legend_x = 0,
annotations = {'x': -0.12, 'xanchor': 'left'}
)
fig.layout.template = None
fig.show()
This part, we need to show the change in state-level mortality rate (latest year - earliest year) for Tuberculosis, Lower Respiratory, and HIV/AIDS on a map.
dt_state = d_state_both[['FIPS','location_name','cause_name','cause_id',
'year_id','sex','mx']]
dt_state = dt_state[(dt_state['cause_id'] == 297) | (dt_state['cause_id'] == 298) |(dt_state['cause_id'] == 322)]
Taking only 2014 data:
dt_state_2014 = dt_state[(dt_state['year_id']==2014)]
dt_state_2014.rename(columns={'mx': 'mx_2014'}, inplace=True)
dt_state_2014= dt_state_2014[['FIPS','location_name', 'cause_name','mx_2014']]
dt_state_2014.head()
Taking only 1980 data:
dt_state_1980 = dt_state[(dt_state['year_id']==1980)]
dt_state_1980.rename(columns={'mx': 'mx_1980'}, inplace=True)
dt_state_1980= dt_state_1980[['FIPS','location_name', 'cause_name','mx_1980']]
dt_state_1980.head()
Joining 1980 and 2014 mortality rate data and taking the difference.
dt_state_joined = pd.merge(dt_state_1980, dt_state_2014, how = 'outer', on = ['FIPS','location_name','cause_name'])
dt_state_joined['mx_diff'] = dt_state_joined['mx_2014']-dt_state_joined['mx_1980']
dt_state_joined= dt_state_joined[['FIPS','location_name', 'cause_name','mx_1980','mx_2014','mx_diff']]
dt_state_joined.head(6)
Pivoting the data frame and then adding location postal code.
dt_state_joined_pivoted = dt_state_joined.pivot(index = 'location_name',
columns ='cause_name',values='mx_diff')
dt_state_joined_pivoted = dt_state_joined_pivoted.reset_index()
dt_state_joined_pivoted.head(6)
dt_state_joined_pivoted = dt_state_joined_pivoted.assign(code = ['AL', 'AK', 'AZ','AR','CA','CO','CT','DE','DC',
'FL','GA','HI','ID','IL','IN',
'IA','KS','KY','LA','ME','MD','MA','MI','MN','MS','MO','MT',
'NE','NV','NH','NJ','NM','NY','NC','ND','OH',
'OK','OR','PA','RI','SC','SD','TN',
'TX','UT','VT','VA','WA','WV','WI','WY'])
dt_state_joined_pivoted.head(6)
fig = go.Figure(data=go.Choropleth(
locations=dt_state_joined_pivoted['code'], # Spatial coordinates
z = dt_state_joined_pivoted['Tuberculosis'].astype(float), # Data to be color-coded
locationmode = 'USA-states', # set of locations match entries in `locations`
colorscale = 'Greens',
colorbar_title = "Tuberculosis Rate chane",
))
fig.update_layout(
title_text = 'Tuberculosis mortality rate change by State over 1980-2014',
geo_scope='usa', # limite map scope to USA
)
fig.show()
fig = go.Figure(data=go.Choropleth(
locations=dt_state_joined_pivoted['code'], # Spatial coordinates
z = dt_state_joined_pivoted['HIV/AIDS'].astype(float), # Data to be color-coded
locationmode = 'USA-states', # set of locations match entries in `locations`
colorscale = 'Reds',
colorbar_title = "HIV/AIDS Rate chane",
))
fig.update_layout(
title_text = 'HIV/AIDS mortality rate change by State over 1980-2014',
geo_scope='usa', # limite map scope to USA
)
fig.show()
fig = go.Figure(data=go.Choropleth(
locations=dt_state_joined_pivoted['code'], # Spatial coordinates
z = dt_state_joined_pivoted['Lower respiratory infections'].astype(float), # Data to be color-coded
locationmode = 'USA-states', # set of locations match entries in `locations`
colorscale = 'Blues',
colorbar_title = "Lower respiratory infections Rate chane",
))
fig.update_layout(
title_text = 'Lower respiratory infections mortality rate change by State over 1980-2014',
geo_scope='usa', # limite map scope to USA
)
fig.show()
Loading United states Data file of mortality rates of diseases and splitting data for both sex:
d_us = pd.read_csv('IHME_USA_COUNTY_INFECT_DIS_MORT_1980_2014_UNITED_STATES_Y2018M03D27.csv')
d_us_both = d_us[d_us.sex_id == 3]
d_us_both = d_us_both[['location_name','cause_name','year_id','mx']]
d_us_both.head()
Plotting this data:
g1 = sns.FacetGrid(d_us_both, col="cause_name", col_wrap = 3, sharex=False, sharey =False)
g1 = g1.map(plt.plot,"year_id", "mx")
Formating United states Data of mortality rates of diseases and taking data for male and female:
d_us_sex = d_us[d_us.sex_id != 3]
d_us_sex = d_us_sex[['location_name','cause_name','sex','sex_id','year_id','mx']]
d_us_sex.tail()
Plotting this data:
gx = sns.FacetGrid(d_us_sex, col="cause_name", hue ='sex',col_wrap = 3, sharex=False, sharey =False)
gx = gx.map(plt.plot,"year_id", "mx")
plt.legend()
In this section, we are going to look at the relationship between Lower Respiratory mortality rate and each of the Total population, Fraction of the population below the poverty line, and Fraction of the population with health insurance coverage, at both state and county levels. So , we are going to joining some data frame used here before of population and mortality rates.
st_pop.head()
d_state_14.head()
d_state_14 = d_state_14.reset_index()
sp_st_14 = d_state_14[['FIPS','Tuberculosis','Lower respiratory infections']]
sp_st_14.head()
sp_st_14.info()
sp_st_14['FIPS'] = sp_st_14['FIPS'].astype('i4')
joined_st = pd.merge(sp_st_14, st_pop, how = 'left', on = ['FIPS'])
joined_st.head()
joined_st = joined_st[['FIPS','NAME','Total_pop','Under_poverty','Fraction', 'Health_Insurance','Health_ins_frac','Lower respiratory infections']]
joined_st.head()
g_pop = sns.relplot(x = "Lower respiratory infections", y = "Total_pop", data = joined_st)
g_fraction = sns.relplot(x = "Lower respiratory infections", y = "Fraction", data = joined_st)
g_fraction = sns.relplot(x = "Lower respiratory infections", y = "Health_ins_frac", data = joined_st)
cnt_pop.sort_values('st_FIPS').head()
cnt_pop.info()
Formating county level FIPS to get match:
##cnt_pop['st_FIPS'] = cnt_pop['st_FIPS'].astype('i4')
cnt_pop['FIPS'] = cnt_pop['FIPS']+1000*cnt_pop['st_FIPS']
cnt_pop.info()
sp_cnt_14.head()
sp_cnt_14.info()
Joining two dataframe on FIPS:
sp_cnt_14['FIPS'] = sp_cnt_14['FIPS'].astype('i4')
sp_cnt_14.info()
joined_cnt = pd.merge(sp_cnt_14, cnt_pop, how = 'left', on = ['FIPS'])
joined_cnt.head()
joined_cnt = joined_cnt[['FIPS','NAME','Total_pop','Under_poverty','Fraction', 'Health_Insurance','Health_Ins_Frac','Lower respiratory infections']]
joined_cnt.head(50)
gc_pop = sns.relplot(x = "Lower respiratory infections", y = "Total_pop", data = joined_cnt)
gc_fraction = sns.relplot(x = "Lower respiratory infections", y = "Fraction", data = joined_cnt)
g_fraction = sns.relplot(x = "Lower respiratory infections", y = "Health_Ins_Frac", data = joined_cnt)